In [1]:
"""Template for showing the results of the last experiment in MLFlow."""

import logging
import numpy as np
import helpsk as hlp
import pandas as pd
import plotly_express as px
from helpsk.utility import read_pickle, Timer
from helpsk.sklearn_eval import MLExperimentResults

from source.service.model_registry import ModelRegistry

%cd /code

from source.config import config  # noqa: E402
logging.config.fileConfig(
    "source/config/logging_to_file.conf",
    defaults={'logfilename': 'output/log.log'},
    disable_existing_loggers=False,
)
/usr/local/lib/python3.11/site-packages/IPython/core/magics/osm.py:417: UserWarning: using dhist requires you to install the `pickleshare` library.
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
/code

Get Latest Experiment Run from MLFlow¶

In [2]:
registry = ModelRegistry(tracking_uri=config.experiment_server_url())
experiment = registry.get_experiment_by_name(exp_name=config.experiment_name())
logging.info(f"Experiment id: {experiment.last_run.exp_id}")
logging.info(f"Experiment name: {experiment.last_run.exp_name}")
logging.info(f"Run id: {experiment.last_run.run_id}")
logging.info(f"Metric(s): {experiment.last_run.metrics}")
2023-11-24 21:00:44 - INFO     | Experiment id: 1
2023-11-24 21:00:44 - INFO     | Experiment name: credit
2023-11-24 21:00:44 - INFO     | Run id: bec9459300864e7d97c251a58ed7a3dd
2023-11-24 21:00:44 - INFO     | Metric(s): {'roc_auc': 0.7746736712183807}

Last Run vs Production¶

What is the metric/performance from the model associated with the last run?

In [3]:
logging.info(f"last run metrics: {experiment.last_run.metrics}")
2023-11-24 21:00:44 - INFO     | last run metrics: {'roc_auc': 0.7746736712183807}

What is the metric/performance of the model in production?

In [4]:
production_run = registry.get_production_run(model_name=config.model_name())
logging.info(f"production run metrics: {production_run.metrics}")
2023-11-24 21:00:44 - INFO     | production run metrics: {'roc_auc': 0.7746736712183807}

Last Run¶

In [5]:
# underlying mlflow object
experiment.last_run.mlflow_entity
Out[5]:
<Run: data=<RunData: metrics={'roc_auc': 0.7746736712183807}, params={'model__criterion': 'entropy',
 'model__max_depth': '99',
 'model__max_features': '0.031837350792579364',
 'model__max_samples': '0.9248344222191298',
 'model__min_samples_leaf': '4',
 'model__min_samples_split': '16',
 'model__n_estimators': '1235',
 'prep__numeric__imputer__transformer': "SimpleImputer(strategy='most_frequent')",
 'prep__numeric__pca__transformer': "PCA(n_components='mle')",
 'prep__numeric__scaler__transformer': 'None',
 'prep__savings_status__savings_encoder__transformer': 'SavingsStatusEncoder()'}, tags={'mlflow.log-model.history': '[{"run_id": "bec9459300864e7d97c251a58ed7a3dd", '
                             '"artifact_path": "model", "utc_time_created": '
                             '"2023-11-24 21:00:40.606806", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"predict_fn": "predict", "loader_module": '
                             '"mlflow.sklearn", "python_version": "3.11.6", '
                             '"env": {"conda": "conda.yaml", "virtualenv": '
                             '"python_env.yaml"}}, "sklearn": '
                             '{"pickled_model": "model.pkl", '
                             '"sklearn_version": "1.3.2", '
                             '"serialization_format": "cloudpickle", "code": '
                             'null}}, "model_uuid": '
                             '"b873cb09a21a41faabfe8a316845d66d", '
                             '"mlflow_version": "2.8.0", "model_size_bytes": '
                             '4509427}]',
 'mlflow.note.content': '2023_11_24_21_00_17',
 'mlflow.runName': '2023_11_24_21_00_17',
 'mlflow.source.git.commit': '81a963fcbc4794b8b7bc6c330fc6b034760eb65d',
 'mlflow.source.name': 'source/entrypoints/cli.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'root',
 'type': 'BayesSearchCV'}>, info=<RunInfo: artifact_uri='/code/mlflow-artifact-root/1/bec9459300864e7d97c251a58ed7a3dd/artifacts', end_time=1700859642241, experiment_id='1', lifecycle_stage='active', run_id='bec9459300864e7d97c251a58ed7a3dd', run_name='2023_11_24_21_00_17', run_uuid='bec9459300864e7d97c251a58ed7a3dd', start_time=1700859617077, status='FINISHED', user_id='root'>, inputs=<RunInputs: dataset_inputs=[]>>

Load Training & Test Data Info¶

In [6]:
with Timer("Loading training/test datasets"):
    X_train = experiment.last_run.download_artifact(artifact_name='x_train.pkl', read_from=read_pickle)  # noqa
    X_test = experiment.last_run.download_artifact(artifact_name='x_test.pkl', read_from=read_pickle)  # noqa
    y_train = experiment.last_run.download_artifact(artifact_name='y_train.pkl', read_from=read_pickle)  # noqa
    y_test = experiment.last_run.download_artifact(artifact_name='y_test.pkl', read_from=read_pickle)  # noqa
Timer Started: Loading training/test datasets
Timer Finished (0.01 seconds)
In [7]:
logging.info(f"training X shape: {X_train.shape}")
logging.info(f"training y length: {len(y_train)}")

logging.info(f"test X shape: {X_test.shape}")
logging.info(f"test y length: {len(y_test)}")
2023-11-24 21:00:44 - INFO     | training X shape: (800, 20)
2023-11-24 21:00:44 - INFO     | training y length: 800
2023-11-24 21:00:44 - INFO     | test X shape: (200, 20)
2023-11-24 21:00:44 - INFO     | test y length: 200
In [8]:
np.unique(y_train, return_counts=True)
Out[8]:
(array([0, 1]), array([559, 241]))
In [9]:
train_y_proportion = np.unique(y_train, return_counts=True)[1] \
    / np.sum(np.unique(y_train, return_counts=True)[1])
logging.info(f"balance of y in training: {train_y_proportion}")
2023-11-24 21:00:44 - INFO     | balance of y in training: [0.69875 0.30125]
In [10]:
test_y_proportion = np.unique(y_test, return_counts=True)[1] \
    / np.sum(np.unique(y_test, return_counts=True)[1])
logging.info(f"balance of y in test: {test_y_proportion}")
2023-11-24 21:00:44 - INFO     | balance of y in test: [0.705 0.295]

Cross Validation Results¶

Best Scores/Params¶

In [11]:
results = experiment.last_run.download_artifact(
    artifact_name='experiment.yaml',
    read_from=MLExperimentResults.from_yaml_file,
)
logging.info(f"Best Score: {results.best_score}")
logging.info(f"Best Params: {results.best_params}")
2023-11-24 21:00:44 - INFO     | Best Score: 0.7746736712183807
2023-11-24 21:00:44 - INFO     | Best Params: {'model': 'ExtraTreesClassifier()', 'max_features': 0.031837350792579364, 'max_depth': 99, 'n_estimators': 1235, 'min_samples_split': 16, 'min_samples_leaf': 4, 'max_samples': 0.9248344222191298, 'criterion': 'entropy', 'imputer': "SimpleImputer(strategy='most_frequent')", 'scaler': 'None', 'pca': "PCA('mle')", 'savings_status_encoder': 'SavingsStatusEncoder()'}
In [12]:
# Best model from each model-type.
data = results.to_formatted_dataframe(return_style=False, include_rank=True)
data["model_rank"] = data.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
data.query('model_rank == 1')
Out[12]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI model C max_features max_depth n_estimators min_samples_split ... colsample_bytree colsample_bylevel reg_alpha reg_lambda num_leaves imputer scaler pca savings_status_encoder model_rank
9 1 0.775 0.719 0.830 ExtraTreesClassifier() NaN 0.031837 99.0 1235.0 16.0 ... NaN NaN NaN NaN NaN SimpleImputer(strategy='most_frequent') None PCA('mle') SavingsStatusEncoder() 1.0
3 2 0.766 0.713 0.818 LogisticRegression() 0.014259 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder() 1.0
10 4 0.759 0.702 0.815 RandomForestClassifier() NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN SimpleImputer() None None OneHotEncoder() 1.0
18 8 0.745 0.684 0.807 XGBClassifier() NaN NaN 4.0 1414.0 NaN ... 0.62372 0.542292 0.783887 1.242691 NaN SimpleImputer(strategy='most_frequent') None None OneHotEncoder() 1.0
22 13 0.735 0.701 0.769 LGBMClassifier() NaN NaN NaN NaN NaN ... 0.61672 NaN 10.572440 26.392904 255.0 SimpleImputer() None PCA('mle') SavingsStatusEncoder() 1.0

5 rows × 26 columns

In [13]:
results.to_formatted_dataframe(return_style=True,
                               include_rank=True,
                               num_rows=500)
Out[13]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI model C max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion learning_rate min_child_weight subsample colsample_bytree colsample_bylevel reg_alpha reg_lambda num_leaves imputer scaler pca savings_status_encoder
1 0.775 0.719 0.830 ExtraTreesClassifier() <NA> 0.032 99.000 1,235.000 16.000 4.000 0.925 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') None PCA('mle') SavingsStatusEncoder()
2 0.766 0.713 0.818 LogisticRegression() 0.014 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder()
3 0.759 0.711 0.807 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() MinMaxScaler() PCA('mle') OneHotEncoder()
4 0.759 0.702 0.815 RandomForestClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
5 0.757 0.711 0.803 LogisticRegression() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
6 0.753 0.713 0.794 ExtraTreesClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
7 0.749 0.704 0.794 RandomForestClassifier() <NA> 0.757 44.000 745.000 33.000 6.000 0.608 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None SavingsStatusEncoder()
8 0.745 0.684 0.807 XGBClassifier() <NA> <NA> 4.000 1,414.000 <NA> <NA> <NA> <NA> 0.013 1.000 0.841 0.624 0.542 0.784 1.243 <NA> SimpleImputer(strategy='most_frequent') None None OneHotEncoder()
9 0.744 0.708 0.781 ExtraTreesClassifier() <NA> 0.563 71.000 1,725.000 49.000 16.000 0.956 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None SavingsStatusEncoder()
10 0.740 0.681 0.799 XGBClassifier() <NA> <NA> 2.000 1,671.000 <NA> <NA> <NA> <NA> 0.021 2.000 0.657 0.591 0.780 0.026 3.081 <NA> SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
11 0.738 0.710 0.766 ExtraTreesClassifier() <NA> 0.861 52.000 1,995.000 33.000 19.000 0.651 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
12 0.738 0.704 0.772 RandomForestClassifier() <NA> 0.528 70.000 1,003.000 37.000 19.000 0.530 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None OneHotEncoder()
13 0.735 0.701 0.769 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 0.338 0.617 <NA> 10.572 26.393 255.000 SimpleImputer() None PCA('mle') SavingsStatusEncoder()
14 0.735 0.683 0.786 XGBClassifier() <NA> <NA> 5.000 1,157.000 <NA> <NA> <NA> <NA> 0.018 3.000 0.694 0.501 0.726 0.033 2.910 <NA> SimpleImputer(strategy='most_frequent') None None OneHotEncoder()
15 0.734 0.678 0.791 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 0.612 0.979 <NA> 8.891 7.306 243.000 SimpleImputer() None PCA('mle') SavingsStatusEncoder()
16 0.733 0.705 0.761 ExtraTreesClassifier() <NA> 0.666 19.000 1,346.000 6.000 44.000 0.852 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None PCA('mle') OneHotEncoder()
17 0.733 0.689 0.777 RandomForestClassifier() <NA> 0.422 68.000 1,348.000 4.000 39.000 0.859 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') None None OneHotEncoder()
18 0.730 0.682 0.778 RandomForestClassifier() <NA> 0.940 40.000 899.000 3.000 16.000 0.696 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None OneHotEncoder()
19 0.729 0.644 0.813 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') StandardScaler() PCA('mle') OneHotEncoder()
20 0.719 0.622 0.817 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() StandardScaler() PCA('mle') SavingsStatusEncoder()
21 0.719 0.667 0.771 XGBClassifier() <NA> <NA> 13.000 1,222.000 <NA> <NA> <NA> <NA> 0.080 5.000 0.825 0.700 0.977 0.020 1.105 <NA> SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
22 0.719 0.628 0.810 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
23 0.707 0.679 0.736 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 0.925 0.859 <NA> 14.754 33.903 400.000 SimpleImputer(strategy='most_frequent') None PCA('mle') SavingsStatusEncoder()
24 0.707 0.657 0.757 XGBClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
25 0.696 0.637 0.755 LGBMClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 0.374 0.990 <NA> 16.223 18.789 378.000 SimpleImputer(strategy='most_frequent') None None SavingsStatusEncoder()
In [14]:
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)
Out[14]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion imputer savings_status_encoder
1 0.759 0.702 0.815 <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() OneHotEncoder()
2 0.749 0.704 0.794 0.757 44.000 745.000 33.000 6.000 0.608 gini SimpleImputer(strategy='median') SavingsStatusEncoder()
3 0.738 0.704 0.772 0.528 70.000 1,003.000 37.000 19.000 0.530 gini SimpleImputer(strategy='median') OneHotEncoder()
4 0.733 0.689 0.777 0.422 68.000 1,348.000 4.000 39.000 0.859 entropy SimpleImputer(strategy='most_frequent') OneHotEncoder()
5 0.730 0.682 0.778 0.940 40.000 899.000 3.000 16.000 0.696 entropy SimpleImputer(strategy='median') OneHotEncoder()
In [15]:
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)
Out[15]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI C imputer scaler pca savings_status_encoder
1 0.766 0.713 0.818 0.014 SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder()
2 0.759 0.711 0.807 0.000 SimpleImputer() MinMaxScaler() PCA('mle') OneHotEncoder()
3 0.757 0.711 0.803 <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
4 0.729 0.644 0.813 0.000 SimpleImputer(strategy='most_frequent') StandardScaler() PCA('mle') OneHotEncoder()
5 0.719 0.622 0.817 0.000 SimpleImputer() StandardScaler() PCA('mle') SavingsStatusEncoder()

BayesSearchCV Performance Over Time¶

In [16]:
results.plot_performance_across_trials(facet_by='model').show()
In [17]:
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()

Variable Performance Over Time¶

In [18]:
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()

Scatter Matrix¶

In [19]:
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
#                             height=1000, width=1000).show()

Variable Performance - Numeric¶

In [20]:
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
                                        height=800)
In [21]:
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()

Variable Performance - Non-Numeric¶

In [22]:
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()

In [23]:
results.plot_score_vs_parameter(
    query='model == "RandomForestClassifier()"',
    parameter='max_features',
    size='max_depth',
    color='savings_status_encoder',
)

In [24]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='max_depth'
# )
In [25]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='imputer'
# )

Last Run - Test Set Performance¶

In [26]:
last_model = experiment.last_run.download_artifact(
    artifact_name='model/model.pkl',
    read_from=read_pickle,
)
print(type(last_model.model))
<class 'sklearn.pipeline.Pipeline'>
In [27]:
last_model
Out[27]:
SklearnModelWrapper(model=Pipeline(steps=[('prep',
                                           ColumnTransformer(transformers=[('numeric',
                                                                            Pipeline(steps=[('imputer',
                                                                                             TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))),
                                                                                            ('scaler',
                                                                                             TransformerChooser()),
                                                                                            ('pca',
                                                                                             TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                                            ['duration',
                                                                             'credit_amount',
                                                                             'installment_commitment',
                                                                             'resid...
                                                                            Pipeline(steps=[('savings_encoder',
                                                                                             TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>))]),
                                                                            ['savings_status'])])),
                                          ('model',
                                           ExtraTreesClassifier(bootstrap=True,
                                                                criterion='entropy',
                                                                max_depth=99,
                                                                max_features=0.031837350792579364,
                                                                max_samples=0.9248344222191298,
                                                                min_samples_leaf=4,
                                                                min_samples_split=16,
                                                                n_estimators=1235,
                                                                random_state=42))]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SklearnModelWrapper(model=Pipeline(steps=[('prep',
                                           ColumnTransformer(transformers=[('numeric',
                                                                            Pipeline(steps=[('imputer',
                                                                                             TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))),
                                                                                            ('scaler',
                                                                                             TransformerChooser()),
                                                                                            ('pca',
                                                                                             TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                                            ['duration',
                                                                             'credit_amount',
                                                                             'installment_commitment',
                                                                             'resid...
                                                                            Pipeline(steps=[('savings_encoder',
                                                                                             TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>))]),
                                                                            ['savings_status'])])),
                                          ('model',
                                           ExtraTreesClassifier(bootstrap=True,
                                                                criterion='entropy',
                                                                max_depth=99,
                                                                max_features=0.031837350792579364,
                                                                max_samples=0.9248344222191298,
                                                                min_samples_leaf=4,
                                                                min_samples_split=16,
                                                                n_estimators=1235,
                                                                random_state=42))]))
Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))),
                                                                  ('scaler',
                                                                   TransformerChooser()),
                                                                  ('pca',
                                                                   TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                  ['duration', 'credit_amount',
                                                   'installment_commitment',
                                                   'residence_since', 'age',
                                                   'existin...
                                                  Pipeline(steps=[('savings_encoder',
                                                                   TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>))]),
                                                  ['savings_status'])])),
                ('model',
                 ExtraTreesClassifier(bootstrap=True, criterion='entropy',
                                      max_depth=99,
                                      max_features=0.031837350792579364,
                                      max_samples=0.9248344222191298,
                                      min_samples_leaf=4, min_samples_split=16,
                                      n_estimators=1235, random_state=42))])
ColumnTransformer(transformers=[('numeric',
                                 Pipeline(steps=[('imputer',
                                                  TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))),
                                                 ('scaler',
                                                  TransformerChooser()),
                                                 ('pca',
                                                  TransformerChooser(transformer=PCA(n_components='mle')))]),
                                 ['duration', 'credit_amount',
                                  'installment_commitment', 'residence_since',
                                  'age', 'existing_credits',
                                  'num_dependen...
                                 ['checking_status', 'credit_history',
                                  'purpose', 'employment', 'personal_status',
                                  'other_parties', 'property_magnitude',
                                  'other_payment_plans', 'housing', 'job',
                                  'own_telephone', 'foreign_worker']),
                                ('savings_status',
                                 Pipeline(steps=[('savings_encoder',
                                                  TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>))]),
                                 ['savings_status'])])
['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))
SimpleImputer(strategy='most_frequent')
SimpleImputer(strategy='most_frequent')
TransformerChooser()
TransformerChooser(transformer=PCA(n_components='mle'))
PCA(n_components='mle')
PCA(n_components='mle')
['checking_status', 'credit_history', 'purpose', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
OneHotEncoder(handle_unknown='ignore')
['savings_status']
TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>)
ExtraTreesClassifier(bootstrap=True, criterion='entropy', max_depth=99,
                     max_features=0.031837350792579364,
                     max_samples=0.9248344222191298, min_samples_leaf=4,
                     min_samples_split=16, n_estimators=1235, random_state=42)
In [28]:
test_predictions = last_model.predict(X_test)
test_predictions[0:10]
Out[28]:
array([0.32405319, 0.33579314, 0.4087151 , 0.29978924, 0.22212748,
       0.32367477, 0.25097886, 0.32317153, 0.24301977, 0.23767955])
In [29]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37,
)
In [30]:
evaluator.plot_actual_vs_predict_histogram()
In [31]:
evaluator.plot_confusion_matrix()
No description has been provided for this image
In [32]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)
Out[32]:
  Score Dummy (prior) Dummy (constant) Explanation
AUC 0.777 0.500 0.500 Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier)
True Positive Rate 0.203 0.000 1.000 20.3% of positive instances were correctly identified.; i.e. 12 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall
True Negative Rate 0.972 1.000 0.000 97.2% of negative instances were correctly identified.; i.e. 137 "Negative Class" labels were correctly identified out of 141 instances
False Positive Rate 0.028 0.000 1.000 2.8% of negative instances were incorrectly identified as positive; i.e. 4 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances
False Negative Rate 0.797 1.000 0.000 79.7% of positive instances were incorrectly identified as negative; i.e. 47 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances
Positive Predictive Value 0.750 0.000 0.295 When the model claims an instance is positive, it is correct 75.0% of the time; i.e. out of the 16 times the model predicted "Positive Class", it was correct 12 times; a.k.a precision
Negative Predictive Value 0.745 0.705 0.000 When the model claims an instance is negative, it is correct 74.5% of the time; i.e. out of the 184 times the model predicted "Negative Class", it was correct 137 times
F1 Score 0.320 0.000 0.456 The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0.
Precision/Recall AUC 0.569 0.295 0.295 Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats.
Accuracy 0.745 0.705 0.295 74.5% of instances were correctly identified
Error Rate 0.255 0.295 0.705 25.5% of instances were incorrectly identified
% Positive 0.295 0.295 0.295 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class"
Total Observations 200 200 200 There are 200 total observations; i.e. sample size
In [33]:
evaluator.plot_roc_auc_curve().show()
In [34]:
evaluator.plot_precision_recall_auc_curve().show()
In [35]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
In [36]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
In [37]:
evaluator.calculate_lift_gain(return_style=True)
/usr/local/lib/python3.11/site-packages/helpsk/sklearn_eval.py:2480: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

Out[37]:
  Gain Lift
Percentile    
5 0.12 2.37
10 0.22 2.20
15 0.29 1.92
20 0.42 2.12
25 0.51 2.03
30 0.59 1.98
35 0.66 1.89
40 0.69 1.74
45 0.75 1.66
50 0.80 1.59
55 0.81 1.48
60 0.83 1.38
65 0.86 1.33
70 0.90 1.28
75 0.95 1.27
80 0.97 1.21
85 0.98 1.16
90 1.00 1.11
95 1.00 1.05
100 1.00 1.00

Production Model - Test Set Performance¶

In [38]:
production_model = production_run.download_artifact(
    artifact_name='model/model.pkl',
    read_from=read_pickle,
)
print(type(production_model.model))
<class 'sklearn.pipeline.Pipeline'>
In [39]:
production_model
Out[39]:
SklearnModelWrapper(model=Pipeline(steps=[('prep',
                                           ColumnTransformer(transformers=[('numeric',
                                                                            Pipeline(steps=[('imputer',
                                                                                             TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))),
                                                                                            ('scaler',
                                                                                             TransformerChooser()),
                                                                                            ('pca',
                                                                                             TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                                            ['duration',
                                                                             'credit_amount',
                                                                             'installment_commitment',
                                                                             'resid...
                                                                            Pipeline(steps=[('savings_encoder',
                                                                                             TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>))]),
                                                                            ['savings_status'])])),
                                          ('model',
                                           ExtraTreesClassifier(bootstrap=True,
                                                                criterion='entropy',
                                                                max_depth=99,
                                                                max_features=0.031837350792579364,
                                                                max_samples=0.9248344222191298,
                                                                min_samples_leaf=4,
                                                                min_samples_split=16,
                                                                n_estimators=1235,
                                                                random_state=42))]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SklearnModelWrapper(model=Pipeline(steps=[('prep',
                                           ColumnTransformer(transformers=[('numeric',
                                                                            Pipeline(steps=[('imputer',
                                                                                             TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))),
                                                                                            ('scaler',
                                                                                             TransformerChooser()),
                                                                                            ('pca',
                                                                                             TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                                            ['duration',
                                                                             'credit_amount',
                                                                             'installment_commitment',
                                                                             'resid...
                                                                            Pipeline(steps=[('savings_encoder',
                                                                                             TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>))]),
                                                                            ['savings_status'])])),
                                          ('model',
                                           ExtraTreesClassifier(bootstrap=True,
                                                                criterion='entropy',
                                                                max_depth=99,
                                                                max_features=0.031837350792579364,
                                                                max_samples=0.9248344222191298,
                                                                min_samples_leaf=4,
                                                                min_samples_split=16,
                                                                n_estimators=1235,
                                                                random_state=42))]))
Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))),
                                                                  ('scaler',
                                                                   TransformerChooser()),
                                                                  ('pca',
                                                                   TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                  ['duration', 'credit_amount',
                                                   'installment_commitment',
                                                   'residence_since', 'age',
                                                   'existin...
                                                  Pipeline(steps=[('savings_encoder',
                                                                   TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>))]),
                                                  ['savings_status'])])),
                ('model',
                 ExtraTreesClassifier(bootstrap=True, criterion='entropy',
                                      max_depth=99,
                                      max_features=0.031837350792579364,
                                      max_samples=0.9248344222191298,
                                      min_samples_leaf=4, min_samples_split=16,
                                      n_estimators=1235, random_state=42))])
ColumnTransformer(transformers=[('numeric',
                                 Pipeline(steps=[('imputer',
                                                  TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))),
                                                 ('scaler',
                                                  TransformerChooser()),
                                                 ('pca',
                                                  TransformerChooser(transformer=PCA(n_components='mle')))]),
                                 ['duration', 'credit_amount',
                                  'installment_commitment', 'residence_since',
                                  'age', 'existing_credits',
                                  'num_dependen...
                                 ['checking_status', 'credit_history',
                                  'purpose', 'employment', 'personal_status',
                                  'other_parties', 'property_magnitude',
                                  'other_payment_plans', 'housing', 'job',
                                  'own_telephone', 'foreign_worker']),
                                ('savings_status',
                                 Pipeline(steps=[('savings_encoder',
                                                  TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>))]),
                                 ['savings_status'])])
['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer(strategy='most_frequent'))
SimpleImputer(strategy='most_frequent')
SimpleImputer(strategy='most_frequent')
TransformerChooser()
TransformerChooser(transformer=PCA(n_components='mle'))
PCA(n_components='mle')
PCA(n_components='mle')
['checking_status', 'credit_history', 'purpose', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
OneHotEncoder(handle_unknown='ignore')
['savings_status']
TransformerChooser(transformer=<source.library.pipeline.SavingsStatusEncoder object at 0xffff25d4d090>)
ExtraTreesClassifier(bootstrap=True, criterion='entropy', max_depth=99,
                     max_features=0.031837350792579364,
                     max_samples=0.9248344222191298, min_samples_leaf=4,
                     min_samples_split=16, n_estimators=1235, random_state=42)
In [40]:
test_predictions = production_model.predict(X_test)
test_predictions[0:10]
Out[40]:
array([0.32405319, 0.33579314, 0.4087151 , 0.29978924, 0.22212748,
       0.32367477, 0.25097886, 0.32317153, 0.24301977, 0.23767955])
In [41]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37,
)
In [42]:
evaluator.plot_actual_vs_predict_histogram()
In [43]:
evaluator.plot_confusion_matrix()
No description has been provided for this image
In [44]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)
Out[44]:
  Score Dummy (prior) Dummy (constant) Explanation
AUC 0.777 0.500 0.500 Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier)
True Positive Rate 0.203 0.000 1.000 20.3% of positive instances were correctly identified.; i.e. 12 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall
True Negative Rate 0.972 1.000 0.000 97.2% of negative instances were correctly identified.; i.e. 137 "Negative Class" labels were correctly identified out of 141 instances
False Positive Rate 0.028 0.000 1.000 2.8% of negative instances were incorrectly identified as positive; i.e. 4 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances
False Negative Rate 0.797 1.000 0.000 79.7% of positive instances were incorrectly identified as negative; i.e. 47 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances
Positive Predictive Value 0.750 0.000 0.295 When the model claims an instance is positive, it is correct 75.0% of the time; i.e. out of the 16 times the model predicted "Positive Class", it was correct 12 times; a.k.a precision
Negative Predictive Value 0.745 0.705 0.000 When the model claims an instance is negative, it is correct 74.5% of the time; i.e. out of the 184 times the model predicted "Negative Class", it was correct 137 times
F1 Score 0.320 0.000 0.456 The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0.
Precision/Recall AUC 0.569 0.295 0.295 Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats.
Accuracy 0.745 0.705 0.295 74.5% of instances were correctly identified
Error Rate 0.255 0.295 0.705 25.5% of instances were incorrectly identified
% Positive 0.295 0.295 0.295 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class"
Total Observations 200 200 200 There are 200 total observations; i.e. sample size
In [45]:
evaluator.plot_roc_auc_curve().show()
In [46]:
evaluator.plot_precision_recall_auc_curve().show()
In [47]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
In [48]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
In [49]:
evaluator.calculate_lift_gain(return_style=True)
/usr/local/lib/python3.11/site-packages/helpsk/sklearn_eval.py:2480: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

Out[49]:
  Gain Lift
Percentile    
5 0.12 2.37
10 0.22 2.20
15 0.29 1.92
20 0.42 2.12
25 0.51 2.03
30 0.59 1.98
35 0.66 1.89
40 0.69 1.74
45 0.75 1.66
50 0.80 1.59
55 0.81 1.48
60 0.83 1.38
65 0.86 1.33
70 0.90 1.28
75 0.95 1.27
80 0.97 1.21
85 0.98 1.16
90 1.00 1.11
95 1.00 1.05
100 1.00 1.00

Feature Importance¶

In [50]:
try:
    importances = production_model.model['model'].feature_importances_
    feature_names = [
        x.replace('non_numeric__', '').replace('numeric__', '')
        for x in production_model.model[:-1].get_feature_names_out()
    ]
    feature_importances = sorted(
        zip(feature_names, importances, strict=True),
        key=lambda x: x[1],
        reverse=False,
    )
    fig = px.bar(
        pd.DataFrame(feature_importances, columns=['feature', 'importance']).tail(20),
        y='feature',
        x='importance',
        orientation='h',
        height=700,
        width=800,
        title='Feature Importances of Production Model',
    )
    fig.show()
except:  # noqa
    print("Error calculating feature importances.")